*** How worried should we be? The implications of fabricated survey data for political science
*** Figure A7. Distribution of Interviews by Day of Week

set more off

* set directory to location of dataset in following line
cd "C:\~\Downloads\"

use "VEN_fraud_data.dta", clear

gen upload_time = Clock(upload, "MDYhm")
format upload_time %tC
gen upload_hour = hhC(upload_time)

gen start_time = Clock(vstart, "MDYhm")
format start_time %tC
gen start_hour = hhC(start_time)

gen start_date = dofC(start_time)
format start_date %td
gen dayofweek = dow(start_date)

gen upload_date = dofC(upload_time)
format upload_date %td
gen dayofweek_upload = dow(upload_date)

gen upload_month = month(upload_date)

* Figure A7
twoway (hist dayofweek_upload if clean_data == 1, discrete color(green)) ///
	   (hist dayofweek_upload if likelyfraud == 1, discrete fcolor(none) lcolor(black)), ///
	   legend(order(1 "Full Clean Data" 2 "Fraudulent")) xtitle("Day of week (from upload time)") ///
	   graphregion(color(white)) note("Sunday = 0")

